//Do-file to create dataset for the STR results
//Last changed by SAO 250109


***Depression
use "D:\STR_RiskScores\Stata_new\STRStage\stg_cesd.dta", clear
keep LopNr DYSAPETI DYSCONTE DYSDEPRE DYSDISLI DYSHAPPY DYSSLEEP DYSLONEL DYSUNKIN DYSSAD DYSPROBL DYSSTART
foreach name in DYSAPETI DYSCONTE DYSDEPRE DYSDISLI DYSHAPPY DYSSLEEP DYSLONEL DYSUNKIN DYSSAD DYSPROBL DYSSTART {
replace `name' = . if inrange(`name', 998, 999)
}
replace DYSCONTE = -1*DYSCONTE+6
replace DYSHAPPY = -1*DYSHAPPY+6
*recode those running from 1-5 to 1-4 (drop 1)
foreach name in DYSAPETI DYSDEPRE DYSDISLI DYSSLEEP DYSLONEL DYSUNKIN DYSSAD DYSPROBL DYSSTART {
replace `name' = . if `name'==1
replace `name' = `name'-1
}
foreach name in DYSAPETI DYSCONTE DYSDEPRE DYSDISLI DYSHAPPY DYSSLEEP DYSLONEL DYSUNKIN DYSSAD DYSPROBL DYSSTART {
replace `name' = (`name'-1)/3
}
egen Depression = rowmean(DYS*)
keep LopNr Depression
tempfile StageDepression
save `StageDepression'

use "D:\STR_RiskScores\Stata_new\STRStage1\stg1_cesd.dta", clear
keep LopNr DYSAPETI DYSDEPRE DYSPROBL DYSSLEEP DYSHAPPY DYSLONLY DYSUNKIN DYSCONTE DYSSAD DYSDISLI DYSSTART
foreach name in DYSAPETI DYSCONTE DYSDEPRE DYSDISLI DYSHAPPY DYSSLEEP DYSLONLY DYSUNKIN DYSSAD DYSPROBL DYSSTART {
replace `name' = . if inrange(`name', 998, 999)
}
replace DYSCONTE = -1*DYSCONTE+5
replace DYSHAPPY = -1*DYSHAPPY+5
foreach name in DYSAPETI DYSCONTE DYSDEPRE DYSDISLI DYSHAPPY DYSSLEEP DYSLONLY DYSUNKIN DYSSAD DYSPROBL DYSSTART {
replace `name' = (`name'-1)/3
}
egen Depression = rowmean(DYS*)
keep LopNr Depression
tempfile Stage1Depression
save `Stage1Depression'

use "D:\STR_RiskScores\Stata_new\STRYatss\yts_cesd.dta", clear
foreach name in CES_DYS_APETITE CES_DYS_DEPRESSED CES_DYS_PROBLEMATIC CES_DYS_SLEEP CES_DYS_HAPPY CES_DYS_LONELY CES_DYS_UNKIND CES_DYS_CONTENT CES_DYS_SAD CES_DYS_DISLIKED CES_DYS_START {
replace `name' = . if inrange(`name', 998, 999)
}
replace CES_DYS_CONTENT = -1*CES_DYS_CONTENT+5
replace CES_DYS_HAPPY = -1*CES_DYS_HAPPY+5
foreach name in CES_DYS_APETITE CES_DYS_DEPRESSED CES_DYS_PROBLEMATIC CES_DYS_SLEEP CES_DYS_HAPPY CES_DYS_LONELY CES_DYS_UNKIND CES_DYS_CONTENT CES_DYS_SAD CES_DYS_DISLIKED CES_DYS_START {
replace `name' = (`name'-1)/3
}
egen Depression = rowmean(CES*)
keep LopNr Depression
tempfile YatssDepression
save `YatssDepression'

*We didn't get the CESD battery in SALT; instead use the simple question on ever having felt depressed for at least two weeks (or having used anti-depressants)
use "D:\STR_RiskScores\Stata_new\STRSalt\salt_dep.dta", clear
keep LopNr FELT_DEPRESSION
replace FELT_DEPRESSION = . if inrange(FELT_DEPRESSION, 998, 999)
replace FELT_DEPRESSION = 0 if FELT_DEPRESSION==2
replace FELT_DEPRESSION = -1*FELT_DEPRESSION+1
rename FELT_DEPRESSION Depression
tempfile SaltDepression
save `SaltDepression'

*Append depression files and use average depression if multiple obs
*Skip the SALT data --> the measure i too different and messes up the results
*use `SaltDepression', clear
use `StageDepression'
append using `Stage1Depression'
append using `YatssDepression'
bysort LopNr: gen occ = _N
bysort LopNr: egen meanDepression = mean(Depression)
bysort LopNr: gen occ1 = _n
keep if occ1==1
drop occ*
drop Depression
rename meanDepression DEP
tempfile DEP
save `DEP'


***Physical activity
use "D:\STR_RiskScores\Stata_new\STRStage\stg_pa.dta", clear
replace PHACTLEV = . if inrange(PHACTLEV, 998, 999)
replace PHACTLEV = (PHACTLEV-1)/9
rename PHACTLEV Activity
tempfile StageActivity
save `StageActivity'

use "D:\STR_RiskScores\Stata_new\STRStage1\stg1_phyact.dta"
replace PHACTLEV = . if inrange(PHACTLEV, 998, 999)
replace PHACTLEV = (PHACTLEV-1)/9
rename PHACTLEV Activity
tempfile Stage1Activity
save `Stage1Activity'

*Append activity files and use average activity if multiple obs
use `StageActivity', clear
append using `Stage1Activity'
bysort LopNr: gen occ = _N
bysort LopNr: egen meanActivity = mean(Activity)
bysort LopNr: gen occ1 = _n
keep if occ1==1
drop occ*
drop Activity
rename meanActivity ACTIVITY
tempfile ACTIVITY
save `ACTIVITY'


***Self-rated health
use "D:\STR_RiskScores\Stata_new\STRSalty\slty_sehea.dta", clear
keep LopNr SJALVUPPSKATTAD6
replace SJALVUPPSKATTAD6 = . if SJALVUPPSKATTAD6==99
*replace to 0-1 range
replace SJALVUPPSKATTAD6 = (-1*SJALVUPPSKATTAD6+21)/20
rename SJALVUPPSKATTAD6 SRH
tempfile SaltySRH
save `SaltySRH'

use "D:\STR_RiskScores\Stata_new\STRStage\stg_mh.dta", clear
keep LopNr HEALTH
replace HEALTH = . if inrange(HEALTH, 998, 999)
*replace to 0-1 range
replace HEALTH = (-1*HEALTH + 5)/4
rename HEALTH SRH
tempfile StageSRH
save `StageSRH'

use "D:\STR_RiskScores\Stata_new\STRStage1\stg1_mh.dta", clear
keep LopNr HEALTH
replace HEALTH = . if inrange(HEALTH, 998, 999)
*replace to 0-1 range
replace HEALTH = (-1*HEALTH + 5)/4
rename HEALTH SRH
tempfile Stage1SRH
save `Stage1SRH'

use "D:\STR_RiskScores\Stata_new\STRYatss\yts_mh.dta", clear
keep LopNr MHI_GENERAL_HEALTH_STAT
replace MHI_GENERAL_HEALTH_STAT = . if inrange(MHI_GENERAL_HEALTH_STAT, 998, 999)
*replace to 0-1 range
replace MHI_GENERAL_HEALTH_STAT = (-1*MHI_GENERAL_HEALTH_STAT + 5)/4
rename MHI_GENERAL_HEALTH_STAT SRH
tempfile YatssSRH
save `YatssSRH'

*Append SRH files and use average SRH if multiple obs
use `SaltySRH', clear
append using `StageSRH'
append using `Stage1SRH'
append using `YatssSRH'
bysort LopNr: gen occ = _N
bysort LopNr: egen meanSRH = mean(SRH)
bysort LopNr: gen occ1 = _n
keep if occ1==1
drop occ*
drop SRH
rename meanSRH SRH

tempfile SRH
save `SRH'


***Neuroticism - KOLLA OM DET FINNS I FLER DELKOHORTER
use "D:\STR_RiskScores\Stata_new\STRSalt\salt_anx.dta", clear
keep LopNr ANXI
replace ANXI = . if inrange(ANXI, 998, 999)
rename ANXI NEURO
tempfile NEURO
save `NEURO'


***Salty data
use "D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta", clear
keep LopNr MORAL1 ATTITYD1 ATTITYD25 ATTITYD31 ATTITYD33* ATTITYD34 ATTITYD38* ATTITYD39 ATTITYD40 PERSONLIGHET1_1 - PERSONLIGHET1_16
drop ATTITYD38_7

*Left-right scale and extremism
replace ATTITYD25 = . if ATTITYD25==99
gen Ideology = (ATTITYD25-1)/9
gen Extremism = (abs(ATTITYD25-5.5)-0.5)/4

*Ideology using preference items - higher values for social and econ = more rightist/conservative
foreach num of numlist 1(1)34 {
replace ATTITYD33_`num' = . if ATTITYD33_`num'==9
}
replace ATTITYD33_12 = . if ATTITYD33_12==6

*Extremism using all 34 preference items
foreach num of numlist 1(1)34  {
gen STR`num' = (abs(ATTITYD33_`num'-3))/2
}
egen extremismSTR = rowmean(STR*)

*Social and economc ideology extremism
egen extremismSTRsocial = rowmean(STR17 STR23 STR24 STR25 STR26 STR27 STR28)
egen extremismSTRecon = rowmean(STR1 STR4 STR6 STR8 STR11 STR29)

drop STR*


*Immigrant policy attitudes
foreach name in 23 27 28 {
replace ATTITYD33_`name' = -1*ATTITYD33_`name'+6
}
foreach name in 17 23 24 25 26 27 28 {
replace ATTITYD33_`name' = 10*(ATTITYD33_`name'-1)/4
}
egen socialattitudes = rowmean(ATTITYD33_17 ATTITYD33_23 ATTITYD33_24 ATTITYD33_25 ATTITYD33_26 ATTITYD33_27 ATTITYD33_28)

*Economic policy attitudes
foreach name in 1 4 6 8 11 29 {
replace ATTITYD33_`name' = 10*(ATTITYD33_`name'-1)/4
}
egen economicattitudes = rowmean(ATTITYD33_1 ATTITYD33_4 ATTITYD33_6 ATTITYD33_8 ATTITYD33_11 ATTITYD33_29)

*Environmental policy attitudes
foreach name in 18 20 21 22 {
replace ATTITYD33_`name' = 10*(ATTITYD33_`name'-1)/4
}
egen environmentalattitudes = rowmean(ATTITYD33_18 ATTITYD33_20 ATTITYD33_21 ATTITYD33_22)

*Foreign policy attitudes
foreach name in 30 {
replace ATTITYD33_`name' = -1*ATTITYD33_`name'+6
}
foreach name in 30 31 32 33 34  {
replace ATTITYD33_`name' = 10*(ATTITYD33_`name'-1)/4
}
egen foreignpolicyattitudes = rowmean(ATTITYD33_30 ATTITYD33_31 ATTITYD33_32 ATTITYD33_33 ATTITYD33_34)

*Redistribution policy attitudes
foreach name in 7 13 14 {
replace ATTITYD33_`name' = 10*(ATTITYD33_`name'-1)/4
}
egen redistributionattitudes = rowmean(ATTITYD33_7 ATTITYD33_13 ATTITYD33_14)


*Political participation - use only items available both in STR and AH
forvalues i= 1(1)6 {
replace ATTITYD38_`i' = 0 if ATTITYD38_`i'==.
}
*egen PolPart = rowmean(ATTITYD38*)
*First generate one item for contacted official (politician (1) or civil servant (2))
gen Contact = ATTITYD38_1
replace Contact = 1 if ATTITYD38_2==1
rename ATTITYD38_3 Rally
rename ATTITYD38_5 Contribute
egen PolPart = rowmean(Contact Rally Contribute)

*Political interest
replace ATTITYD31 = . if ATTITYD31==9
gen PolIntr = (ATTITYD31*-1+4)/3

*Vote norm
replace ATTITYD34 = . if ATTITYD34==99
gen VoteNorm = (ATTITYD34-1)/9

*Efficacy
replace ATTITYD39 = . if ATTITYD39==99
replace ATTITYD40 = . if ATTITYD40==99
gen ExtEff = (ATTITYD39-1)/9
gen IntEff = (ATTITYD40-1)/9

*AMBI/Extraversion
forvalues i= 1(1)16 {
replace PERSONLIGHET1_`i' = . if PERSONLIGHET1_`i'==9
}
foreach num in 4 6 9 13 15 16 {
replace PERSONLIGHET1_`num' = -1*PERSONLIGHET1_`num'+4
}
*Recode to 0-1 range
forvalues i= 1(1)16 {
replace PERSONLIGHET1_`i' =  (PERSONLIGHET1_`i'-1)/2
}
egen EXTRA = rowmean(PERSONLIGHET1_1 - PERSONLIGHET1_16)

*Subjective wellbeing
replace ATTITYD1 = . if ATTITYD1==9
replace ATTITYD1 = -1*ATTITYD1+5
rename ATTITYD1 SWB

*Risk attitudes
replace MORAL1 = . if MORAL1==99
rename MORAL1 RISK
*Recode to 0-1 range
replace RISK = (RISK-1)/9



keep LopNr PolPart PolIntr VoteNorm ExtEff IntEff Ideology Contact Rally Contribute Extremism EXTRA SWB RISK socialattitudes economicattitudes environmentalattitudes foreignpolicyattitudes redistributionattitudes extremismSTR extremismSTRsocial extremismSTRecon
tempfile Salty
save `Salty'



***Conscription data
use "D:\STR_RiskScores\Stata_new\Monstring\insark.dta", clear
keep LopNr pprf_pgrp
destring pprf_pgrp, replace force
bysort LopNr: egen IQ = max(pprf_pgrp)
bysort LopNr: gen occ = _n
keep if occ==1
drop pprf_pgrp occ
tempfile Insark
save `Insark'

use "D:\STR_RiskScores\Stata_new\Monstring\rekryteringsmyndigheten.dta", clear
keep LopNr gkap
bysort LopNr: egen IQ = max(gkap)
bysort LopNr: gen occ = _n
keep if occ==1
drop gkap occ
tempfile Rekryteringsmyndigheten
save `Rekryteringsmyndigheten'

use "D:\STR_RiskScores\Stata_new\Monstring\lev_so__miq_data.dta", clear
keep LopNr Total__1_9_
bysort LopNr: egen IQ = max(Total__1_9_)
bysort LopNr: gen occ = _n
keep if occ==1
drop Total__1_9_ occ
tempfile MagnusData
save `MagnusData'

append using `Insark'
append using `Rekryteringsmyndigheten'

bysort LopNr: egen IQ1 = max(IQ)
bysort LopNr: gen occ = _n
keep if occ==1
drop occ
drop IQ
rename IQ1 CP
replace CP = . if CP==0
drop if CP==.

tempfile CP
save `CP'


***Turnout data
use "D:\STR_RiskScores\Stata_new\VD\Lev_valdelt_1970.dta", clear
keep LopNr r k l
rename r RostR1970
rename k RostK1970
rename l RostL1970
foreach name in R K L {
replace Rost`name'1970 = . if Rost`name'1970==3
replace Rost`name'1970 = 0 if Rost`name'1970==1
replace Rost`name'1970 = 1 if Rost`name'1970==2
replace Rost`name'1970 = 1 if inrange(Rost`name'1970, 4, 6)
}
tempfile RostRKL1970
save `RostRKL1970'


use "D:\STR_RiskScores\Stata_new\VD\Lev_valdelt_1994rkl.dta", clear
keep LopNr r k l
rename r RostR1994
rename k RostK1994
rename l RostL1994
tempfile RostRKL1994
foreach name in R K L {
replace Rost`name'1994 = . if Rost`name'1994==3
replace Rost`name'1994 = 0 if Rost`name'1994==1
replace Rost`name'1994 = 1 if Rost`name'1994==2
replace Rost`name'1994 = 1 if inrange(Rost`name'1994, 4, 6)
}
save `RostRKL1994'


use "D:\STR_RiskScores\Stata_new\VD\Lev_valdelt_1994f.dta", clear
keep LopNr f
rename f RostEU
replace RostEU = . if RostEU==3
replace RostEU = 0 if RostEU==1
replace RostEU = 1 if RostEU==2
replace RostEU = 1 if inrange(RostEU, 4, 6)
tempfile RostEU1994
save `RostEU1994'


use "D:\STR_RiskScores\Stata_new\VD\vd09.dta", clear
keep LopNr e
rename e RostEP2009
replace RostEP2009 = . if RostEP2009==3
replace RostEP2009 = 0 if RostEP2009==1
replace RostEP2009 = 1 if RostEP2009==2
replace RostEP2009 = 1 if inrange(RostEP2009, 4, 6)
tempfile RostEP2009
save `RostEP2009'


use "D:\STR_RiskScores\Stata_new\VD\vd10.dta", clear
keep LopNr r k l
rename r RostR2010
rename k RostK2010
rename l RostL2010
foreach name in R K L {
replace Rost`name'2010 = . if Rost`name'2010==3
replace Rost`name'2010 = 0 if Rost`name'2010==1
replace Rost`name'2010 = 1 if Rost`name'2010==2
replace Rost`name'2010 = 1 if inrange(Rost`name'2010, 4, 6)
}
tempfile RostRKL2010
save `RostRKL2010'


use "D:\STR_RiskScores\Stata_new\VD\Lev_vd18.dta", clear
keep LopNr rrost krost lrost Rostratt
rename rrost RostR2018
rename krost RostK2018
rename lrost RostL2018
replace RostR2018 = . if Rostratt==3
replace RostK2018 = . if Rostratt==2
replace RostL2018 = . if Rostratt==2
drop Rostratt
tempfile RostRKL2018
save `RostRKL2018'


use "D:\STR_RiskScores\Stata_new\VD\EU2019.dta", clear
keep LopNr Eurost
rename Eurost RostEP2019
tempfile RostEP2019
save `RostEP2019'


*Education from census1970 and LISA (EduYears defined according to first SUN/ISCED digit)
use "D:\STR_RiskScores\Stata_new\FoB\FoB70.dta", clear
keep LopNr UtbNiva
gen EduYears1970 = 7 if UtbNiva==1
replace EduYears1970 = 10 if UtbNiva==2
replace EduYears1970 = 13 if UtbNiva==3
replace EduYears1970 = 13 if UtbNiva==4
replace EduYears1970 = 15 if UtbNiva==5
replace EduYears1970 = 19 if UtbNiva==6
replace EduYears1970 = 22 if UtbNiva==7
drop UtbNiva
tempfile EduYears1970
save `EduYears1970'


foreach year of numlist 1990(1)2018 {
use "D:\STR_RiskScores\Stata_new\LISA\LISA_`year'.dta", clear
keep LopNr Sun2000niva
gen Eduyears`year' = 7 if Sun2000niva<200
replace Eduyears`year' = 10 if inrange(Sun2000niva, 200, 299)
replace Eduyears`year' = 10 if Sun2000niva==204
replace Eduyears`year' = 13 if inrange(Sun2000niva, 310, 319)
replace Eduyears`year' = 13 if inrange(Sun2000niva, 320, 329)
replace Eduyears`year' = 13 if inrange(Sun2000niva, 330, 339)
replace Eduyears`year' = 15 if inrange(Sun2000niva, 410, 419)
replace Eduyears`year' = 19 if inrange(Sun2000niva, 520, 529)
replace Eduyears`year' = 19 if inrange(Sun2000niva, 530, 539)
replace Eduyears`year' = 19 if inrange(Sun2000niva, 540, 549)
replace Eduyears`year' = 19 if inrange(Sun2000niva, 550, 559)
replace Eduyears`year' = 22 if inrange(Sun2000niva, 600, 629)
replace Eduyears`year' = 22 if inrange(Sun2000niva, 640, 649)
drop Sun2000niva
tempfile EduYears`year'
save `EduYears`year''
}


*Income data
use "D:\STR_RiskScores\Stata_new\FoB\FoB70.dta", clear
keep LopNr ArbInk
rename ArbInk Income1970
tempfile Income1970
save `Income1970'

use "D:\STR_RiskScores\Stata_new\FoB\FoB75.dta", clear
keep LopNr ArbInk
rename ArbInk Income1975
tempfile Income1975
save `Income1975'

use "D:\STR_RiskScores\Stata_new\FoB\FoB85.dta", clear
keep LopNr ArbInk
rename ArbInk Income1985
replace Income1985 = Income1985*100
tempfile Income1985
save `Income1985'

foreach year of numlist 1990(1)2018 {
use "D:\STR_RiskScores\Stata_new\LISA\LISA_`year'.dta", clear
keep LopNr ForvErs
gen occ = 1
bysort LopNr: egen nocc = sum(occ)
keep if nocc==1
drop occ nocc
rename ForvErs Income`year'
replace Income`year' = Income`year'*100
tempfile Income`year'
save `Income`year''
}


***PGI
use "D:\STR_RiskScores\Stata_new\PGS\repository_1.dta", clear
keep LopNr PGI_EA_single PGI_ADVENTURE_single PGI_CP_single PGI_NEURO_single PGI_RISK_single PGI_EXTRA_single PGI_MORNING_single PGI_BMI_single PGI_DEP_single PGI_HEIGHT_single PGI_SELFHEALTH_single PGI_ACTIVITY_single PGI_BMI_single PGI_SWB_single PC* batch
rename PGI_EA_single EA_PGI
rename PGI_CP_single CP_PGI
rename PGI_ADVENTURE_single ADVENTURE_PGI
rename PGI_NEURO_single NEURO_PGI
rename PGI_RISK_single RISK_PGI
rename PGI_EXTRA_single EXTRA_PGI
rename PGI_MORNING_single MORNING_PGI
rename PGI_DEP_single DEP_PGI
rename PGI_HEIGHT_single HEIGHT_PGI
rename PGI_SELFHEALTH_single SRH_PGI
rename PGI_ACTIVITY_single ACTIVITY_PGI
rename PGI_BMI_single BMI_PGI
rename PGI_SWB_single SWB_PGI
tempfile PGI
save `PGI'


*Create sample
use "D:\STR_RiskScores\Stata_new\STR\cohort_combined.dta", clear
destring Byear, replace
rename Byear Bmonth
gen Byear = floor(Bmonth/100)

merge 1:1 LopNr using "D:\STR_RiskScores\Stata_new\FlerGen\Foraldrar_new.dta", keepusing(LopNrMor LopNrFar)
keep if _merge==3
drop _merge

*Drop if foreign background
*merge 1:1 LopNr using "D:\STR_RiskScores\Stata_new\FlerGen\FodelseUppg.dta", keepusing(UtlSvBakg)
rename LopNr aLopNr
rename LopNrMor LopNr
merge m:1 LopNr using "D:\STR_RiskScores\Stata_new\FlerGen\IFAU.dta"
drop if _merge==2
drop _merge
drop if inrange(ifau, 40, 53)
drop ifau
rename LopNr LopNrMor
rename LopNrFar LopNr
merge m:1 LopNr using "D:\STR_RiskScores\Stata_new\FlerGen\IFAU.dta"
drop if _merge==2
drop _merge
drop if inrange(ifau, 40, 53)
drop ifau
rename LopNr LopNrFar
rename aLopNr LopNr
merge 1:1 LopNr using "D:\STR_RiskScores\Stata_new\FlerGen\IFAU.dta"
drop if _merge==2
drop _merge
drop if inrange(ifau, 40, 53)
drop ifau



*Merge all files
merge 1:1 LopNr using `PGI'
keep if _merge==3
drop _merge

merge 1:1 LopNr using `RostRKL1970'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostRKL1994'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostRKL2010'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostRKL2018'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostEU1994'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostEP2009'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `RostEP2019'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `Salty'
drop if _merge==2
*Generate indicator for salty sample
gen SALTY = 1 if _merge==3
drop _merge

merge 1:1 LopNr using `CP'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `NEURO'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `SRH'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `DEP'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `ACTIVITY'
drop if _merge==2
drop _merge

foreach year in 1970 1990 2000 2010 2015 2018 {
merge 1:1 LopNr using `EduYears`year''
drop if _merge==2
drop _merge
}

merge 1:1 LopNr using `Income1970'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `Income1975'
drop if _merge==2
drop _merge

merge 1:1 LopNr using `Income1985'
drop if _merge==2
drop _merge

foreach year of numlist 1990(1)2018 {
merge 1:1 LopNr using `Income`year''
drop if _merge==2
drop _merge
}


*Average income between 25-65
*2018 prices
replace Income1970 = 1875*Income1970/236
replace Income1975 = 1875*Income1975/347
replace Income1985 = 1875*Income1985/878
replace Income1990 = 1875*Income1990/1187
replace Income1991 = 1875*Income1991/1297
replace Income1992 = 1875*Income1992/1327
replace Income1993 = 1875*Income1993/1389
replace Income1994 = 1875*Income1994/1419
replace Income1995 = 1875*Income1995/1455
replace Income1996 = 1875*Income1996/1462
replace Income1997 = 1875*Income1997/1469
replace Income1998 = 1875*Income1998/1467
replace Income1999 = 1875*Income1999/1474
replace Income2000 = 1875*Income2000/1489
replace Income2001 = 1875*Income2001/1525
replace Income2002 = 1875*Income2002/1558
replace Income2003 = 1875*Income2003/1588
replace Income2004 = 1875*Income2004/1594
replace Income2005 = 1875*Income2005/1601
replace Income2006 = 1875*Income2006/1623
replace Income2007 = 1875*Income2007/1659
replace Income2008 = 1875*Income2008/1716
replace Income2009 = 1875*Income2009/1711
replace Income2010 = 1875*Income2010/1733
replace Income2011 = 1875*Income2011/1778
replace Income2012 = 1875*Income2012/1794
replace Income2013 = 1875*Income2013/1793
replace Income2014 = 1875*Income2014/1790
replace Income2015 = 1875*Income2015/1789
replace Income2016 = 1875*Income2016/1807
replace Income2017 = 1875*Income2017/1839


foreach num of numlist 25(1)65 {
gen Income`num' = Income2018 if Byear==(2018-`num')
forvalues i= 2017(-1)1990 {
replace Income`num' = Income`i' if Byear==(`i'-`num')
}
replace Income`num' = Income1985 if Byear==(1985-`num')
replace Income`num' = Income1975 if Byear==(1975-`num')
replace Income`num' = Income1970 if Byear==(1970-`num')
}

egen Income = rowmean(Income25-Income65)
gen lnIncome = ln(Income+1)
drop Income1* Income2* Income3* Income4* Income5* Income6*



*Own turnout
egen TurnoutNat = rowmean(RostR1970 RostR1994 RostR2010 RostR2018)
egen TurnoutMun = rowmean(RostK1970 RostK1994 RostK2010 RostK2018)
egen TurnoutReg = rowmean(RostL1970 RostL1994 RostL2010 RostL2018)
egen TurnoutEP = rowmean(RostEP2009 RostEP2019)

rename TurnoutNat Turnout1
rename TurnoutEP Turnout2


*Own EA
egen EduYears = rowmax(EduYears1970 Eduyears1990 Eduyears2000 Eduyears2010 Eduyears2015 Eduyears2018)
drop EduYears1* Eduyears2*



*Standardize PGIs by birth year
foreach name in EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI DEP_PGI HEIGHT_PGI SRH_PGI ACTIVITY_PGI BMI_PGI SWB_PGI {
bysort Byear: egen meanPGI = mean(`name')
bysort Byear: egen sdPGI = sd(`name')
gen std`name' = (`name'-meanPGI)/sdPGI
drop meanPGI sdPGI
}


*Dummy for male
gen MALE = SEX*-1+2
drop SEX

keep Turnout1 Turnout2 PolPart VoteNorm PolIntr ExtEff IntEff Ideology Extremism Contact Rally Contribute stdEA_PGI stdCP_PGI stdEXTRA_PGI stdNEURO_PGI stdRISK_PGI stdADVENTURE_PGI stdMORNING_PGI stdSRH_PGI stdDEP_PGI stdSWB_PGI stdACTIVITY_PGI stdBMI_PGI stdHEIGHT_PGI MALE Byear PC1-PC10 LopNrParID CP EXTRA NEURO RISK SRH DEP SWB ACTIVITY EduYears lnIncome SALTY BESTZYG socialattitudes economicattitudes environmentalattitudes foreignpolicyattitudes redistributionattitudes extremismSTR extremismSTRsocial extremismSTRecon batch EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI DEP_PGI HEIGHT_PGI SRH_PGI ACTIVITY_PGI BMI_PGI SWB_PGI


*Standardize all mediators
foreach name in EduYears EXTRA NEURO RISK SRH DEP SWB ACTIVITY CP {
zscore `name'
drop `name'
rename z_`name' `name'
}

gen SchoolID = 0
gen sample = "STR"

rename LopNrParID PairID

save "E:\ProjData\PGS and politics/STRdata.dta", replace

